# Run Merge Analysis for Robustness Checks
# Alexander F. Gazmararian
# agazmararian@gmail.com

library(tidyverse)
library(here)
library(tidylog)

# Load the merge function
source(here("analysis", "statements", "merge.R"))

# Skip automatic execution of merge.R when sourcing
SKIP_MERGE_EXECUTION <- TRUE

# Configuration for robustness comparisons
ROBUSTNESS_CHECKS <- c(
  "primary" = "annotated_statements.csv",
  "codebook_robustness" = "annotated_statements_codebook_robustness.csv"
)

# Helper function to create output filenames
create_output_filename <- function(annotation_type) {
  if (annotation_type == "primary") {
    return(here("data", "output", "statements_analysis.csv"))
  } else {
    return(here("data", "output", sprintf("statements_analysis_%s.csv", annotation_type)))
  }
}

# Run merge for each robustness check
message("=== RUNNING MERGE FOR MULTIPLE ANNOTATION VERSIONS ===")

results <- list()

for (check_name in names(ROBUSTNESS_CHECKS)) {
  message(sprintf("\n--- Processing %s annotations ---", check_name))
  
  # Construct file paths
  annotations_file <- here("data", "inter", ROBUSTNESS_CHECKS[[check_name]])
  output_file <- create_output_filename(check_name)
  
  # Check if annotations file exists
  if (!file.exists(annotations_file)) {
    message(sprintf("Skipping %s: File not found at %s", check_name, annotations_file))
    next
  }
  
  # Run merge
  tryCatch({
    df <- merge_statements_covariates(
      annotations_file = annotations_file,
      output_file = output_file,
      verbose = TRUE
    )
    
    results[[check_name]] <- df
    message(sprintf("Successfully processed %s: %d observations", check_name, nrow(df)))
    
  }, error = function(e) {
    message(sprintf("Error processing %s: %s", check_name, e$message))
  })
}

# Summary comparison if multiple results exist
if (length(results) > 1) {
  message("\n=== COMPARISON SUMMARY ===")
  
  # Compare key statistics across robustness checks
  comparison <- map_dfr(results, function(df) {
    data.frame(
      n_obs = nrow(df),
      n_projects = n_distinct(df$id),
      gave_statement_pct = round(100 * mean(df$gave_statement, na.rm = TRUE), 1),
      credit_biden_pct = round(100 * mean(df$credit_biden, na.rm = TRUE), 1),
      credit_ira_pct = round(100 * mean(df$credit_ira, na.rm = TRUE), 1),
      credit_bil_pct = round(100 * mean(df$credit_bil, na.rm = TRUE), 1),
      gives_credit_pct = round(100 * mean(df$gives_credit, na.rm = TRUE), 1)
    )
  }, .id = "annotation_type")
  
  message("Key statistics comparison:")
  print(comparison)
  
  # Save comparison table
  comparison_file <- here("data", "output", "robustness_comparison.csv")
  write_csv(comparison, comparison_file)
  message(sprintf("Saved comparison table: %s", comparison_file))
}

message("\n=== MERGE ROBUSTNESS CHECK COMPLETE ===")
message("Files created:")
for (check_name in names(results)) {
  output_file <- create_output_filename(check_name)
  message(sprintf("- %s: %s", check_name, output_file))
}
